#!/bin/sh

if [ -z "$WIKIPEDIA_DATA_DIR" ]; then
  WIKIPEDIA_DATA_DIR="/groups/corpora/wikipedia"
fi

if [ -z "$WIKIPEDIA_AUX_DATA_DIR" ]; then
  WIKIPEDIA_AUX_DATA_DIR="$HOME/devel/wikipedia"
fi

TRCONLL_DEV_DIR="/groups/projects/pcl_travel/data/trconlldevtest/dev"

GEOTEXT_TOP_DIR="$HOME/devel/geotext"
GEOTEXT_INPUT_DIR="$GEOTEXT_TOP_DIR/GeoText.2010-10-12/processed_data"
GEOTEXT_OUTPUT_DIR="$GEOTEXT_TOP_DIR/output"

DUMP_PREFIX="enwiki-20100905"
if [ -z "$NO_USE_PERMUTED" ]; then
  if [ -e "$WIKIPEDIA_DATA_DIR/$DUMP_PREFIX-permuted-pages-articles.xml.bz2" ]; then
    DUMP_PREFIX="$DUMP_PREFIX-permuted"
  fi
fi

PAGES_ARTICLES_PREFIX="$DUMP_PREFIX-pages-articles"

WDIR="$WIKIPEDIA_DATA_DIR"
AUXDIR="$WIKIPEDIA_AUX_DATA_DIR"

ORIG_ARTICLE_DATA_SUFFIX="article-data.txt"
COMBINED_ARTICLE_DATA_SUFFIX="combined-article-data.txt"
COORDS_SUFFIX="coords.txt"
COUNTS_SUFFIX="counts-only-coord-articles.txt"
LINKS_SUFFIX="links-only-coord-articles.txt"
TOPONYM_EVAL_SUFFIX="toponym-eval.txt"
DISAMBIG_ID_SUFFIX="disambig.id.txt"
TITLE2ID_SUFFIX="title2id.txt"
DUMP_SUFFIX="xml.bz2"

OUT_STOPWORDS_FILE="stopwords.english"
OUT_GAZETTEER_FILE="world-dataen-fixed.txt"
OUT_ORIG_ARTICLE_DATA_FILE="$DUMP_PREFIX-$ORIG_ARTICLE_DATA_SUFFIX"
OUT_COMBINED_ARTICLE_DATA_FILE="$DUMP_PREFIX-$COMBINED_ARTICLE_DATA_SUFFIX"
OUT_COORDS_FILE="$DUMP_PREFIX-$COORDS_SUFFIX"
OUT_COUNTS_FILE="$DUMP_PREFIX-$COUNTS_SUFFIX"
OUT_LINKS_FILE="$DUMP_PREFIX-$LINKS_SUFFIX"
OUT_TOPONYM_EVAL_FILE="$DUMP_PREFIX-$TOPONYM_EVAL_SUFFIX"
OUT_DISAMBIG_ID_FILE="$PAGES_ARTICLES_PREFIX.$DISAMBIG_ID_SUFFIX"
OUT_TITLE2ID_FILE="$PAGES_ARTICLES_PREFIX.$TITLE2ID_SUFFIX"
OUT_DUMP_FILE="$PAGES_ARTICLES_PREFIX.$DUMP_SUFFIX"

IN_STOPWORDS_FILE="$AUXDIR/$OUT_STOPWORDS_FILE"
IN_GAZETTEER_FILE="$AUXDIR/$OUT_GAZETTEER_FILE"
IN_ORIG_ARTICLE_DATA_FILE="$WDIR/$OUT_ORIG_ARTICLE_DATA_FILE"
IN_COMBINED_ARTICLE_DATA_FILE="$WDIR/$OUT_COMBINED_ARTICLE_DATA_FILE"
IN_COORDS_FILE="$WDIR/$OUT_COORDS_FILE"
IN_COUNTS_FILE="$WDIR/$OUT_COUNTS_FILE"
IN_LINKS_FILE="$WDIR/$OUT_LINKS_FILE"
IN_TOPONYM_EVAL_FILE="$WDIR/$OUT_TOPONYM_EVAL_FILE"
IN_DISAMBIG_ID_FILE="$WDIR/$OUT_DISAMBIG_ID_FILE"
IN_TITLE2ID_FILE="$WDIR/$OUT_TITLE2ID_FILE"
IN_DUMP_FILE="$WDIR/$OUT_DUMP_FILE"

STOPWORDS_ARG="--stopwords-file $IN_STOPWORDS_FILE"
GAZETTEER_ARG="--gazetteer-file $IN_GAZETTEER_FILE"
ORIG_ARTICLE_DATA_ARG="--article-data-file $IN_ORIG_ARTICLE_DATA_FILE"
COMBINED_ARTICLE_DATA_ARG="--article-data-file $IN_COMBINED_ARTICLE_DATA_FILE"
COORDS_ARG="--coords-file $IN_COORDS_FILE"
COUNTS_ARG="--counts-file $IN_COUNTS_FILE"
LINKS_ARG="--links-file $IN_LINKS_FILE"
#No TOPONYM_EVAL_ARG; used in --evalfile
DISAMBIG_ID_ARG="--disambig-id-file $IN_DISAMBIG_ID_FILE"
TITLE2ID_ARG="--title2id-file $IN_TITLE2ID_FILE"

# Include local configuration if it exists

if [ -e "$TEXTGROUNDER_PYTHON/local-config" ]; then
  . "$TEXTGROUNDER_PYTHON/local-config"
fi

